masterData <- read_csv("analysis_data.csv")

library(stargazer);library(sjPlot);library(gridExtra);library(ggrepel)
library(arm); library(scales);library(lme4); library(dplyr)


set.seed(123)




# a function to get the modal value of any vector
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}



# getting the useful model data

model_data_model<-glmer(direction ~ scIdeolScore+ lcDispositionDirection +lcDissent+
                             practiceSpecEconomic +practiceSpecCriminal+practiceSpecPublic+
                             practiceSpecCivil+practiceSpecCommon+PriorExp+female+
                             (1|justice),
                           data=masterData[masterData$unan==0,], family = 'binomial',
                           control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))

model_data<- model_data_model@frame


library(rethinking)





model_data$justice<- as.character(model_data$justice)

model_sum<-model_data%>%group_by(justice)%>%
  summarise(scIdeolScore=getmode(scIdeolScore),
            lcDispositionDirection   = mean(as.numeric(lcDispositionDirection=='liberal')),
            lcDissent = mean(lcDissent),
            practiceSpecEconomic = getmode(practiceSpecEconomic), 
            practiceSpecCriminal = getmode(practiceSpecCriminal), 
            practiceSpecPublic = getmode(practiceSpecPublic), 
            practiceSpecCivil = getmode(practiceSpecCivil), 
            practiceSpecCommon = getmode(practiceSpecCommon), 
            female = getmode(female), 
            PriorExp = getmode(PriorExp),
            meanDirection= mean(direction),
            numberCases= n(), 
            numberSuccesses = sum(direction))







model_sum$initials<-judge_data$justiceInitials[1:21]
model_sum$appPMParty<-judge_data$appPM[1:21]

model_sum$appPMParty[model_sum$appPMParty==2]<- "coalition"
model_sum$appPMParty[model_sum$appPMParty==1]<- "alp"


n_ideology_Obs<-rev(c(17,24,12,63,15,58,160,68,85,120,65,129,43,63,66,28,31,31,10,13,21))

model_sum$n_ideology_Obs<- n_ideology_Obs

model_sum$ideology_SD <- sqrt((model_sum$scIdeolScore*(1-model_sum$scIdeolScore))/model_sum$n_ideology_Obs)





## validating the application of the standard deviation of the proportion with bootstrapping
namesJustices<- judge_data$justiceName[21:1]


library(readr);library(tidyr)


justice_ideology_content_analysis_final <- read_csv("Inputs/justice_ideology_content_analysis_final.csv")

wide<-justice_ideology_content_analysis_final %>% select(articleNo:P72)


longCode<- wide%>%gather(paragraph,score, P1:P72, factor_key=TRUE)


longCode$score[longCode$score>=4]<-NA

data<-longCode[!is.na(longCode$score),]
data<- data[data$nominee<=53,]

# removing lower court reporting for Michelle Gordon, whose reporting in right leaning papers
# was focused on a sensationalised account of one lower court case, making her appear more liberal
# than typical profile reporting did.

data<- data[data$score<4,]

data<- data[data$highCourtArticle!=3,]

bootstrapped_ideology<- matrix(NA, ncol=21,nrow=10000)
simple_ideology <- matrix(33:53, ncol = 3, nrow=21)

Moderates<- data.frame(name = namesJustices, propModerates=rep(NA, 21))

for (j in 1:21){
  
  dat<- subset(data, data$nominee==j+32)
  
  
  C <- length(dat$score[dat$score==1])
  M <- length(dat$score[dat$score==2])
  L <- length(dat$score[dat$score==3])
  
  simple_ideology[j, 2] <- (1+(L-C)/(L+C+M))/2
  simple_ideology[j, 3] <-length(dat$score)
  
  Moderates$propModerates[j]<-M / (M+C+M)
 
  for (i in 1:10000){
    
    boot<-sample(dat$score, nrow(dat), replace = TRUE)
    
    #smoothing the bootstrap inputs from the data...
    C <- length(boot[boot==1])+rnorm(1, mean=0,sd=0.5)
    M <- length(boot[boot==2])+rnorm(1, mean=0,sd=0.5)
    L <- length(boot[boot==3])+rnorm(1, mean=0,sd=0.5)
    
    bootstrapped_ideology[i, j]<-  (1+(L-C)/(L+C+M))/2
    
  }
}





bootstrapped_ideology<-as.data.frame(bootstrapped_ideology)
names(bootstrapped_ideology)<- paste("justice", 33:53)

#checking the coverage of the bootstrap...

for (i in 1:21){
  
  coverage<- sum(duplicated(bootstrapped_ideology[,i]))/nrow(bootstrapped_ideology) *100
  
  print(paste0("Bootstraps for Justice ", namesJustices[i], 
               " (with ",model_sum$n_ideology_Obs[i], " articles) are ", 
               coverage, "% duplicated"))
}




#looking at the bootstrapped distributions...



par(mfrow=c(3,7))
par(mfrow=c(1,1))

for (i in 1:21){
  hist(bootstrapped_ideology[,i], xlab ="", ylab = "",xlim=c(0,1),
       main = paste0(namesJustices[i], " (# articles: ", model_sum$n_ideology_Obs[i], ")")
  )
  print(summary(bootstrapped_ideology[,i]))
}





bootsum<-bootstrapped_ideology%>% gather(justice, bootscore) %>% 
  group_by(justice) %>% summarise(meanBoot = mean(bootscore), 
                                  sdBoot = sd(bootscore))




simple_ideology<- as.data.frame(simple_ideology)

names(simple_ideology)[2]<- "simple_ideology"
names(simple_ideology)[3]<- "nArticles"

comparison<-bind_cols(model_sum, bootsum, simple_ideology)[,c(1:2,18 , 20:21, 23, 24)]


comparison$proportion_SD <- sqrt((comparison$simple_ideology*(1-comparison$simple_ideology))/comparison$nArticles)


comparison$diffSD<- comparison$sdBoot - comparison$proportion_SD 


#resampling the bootstraps for mean and standard deviation. 


# resampled_bootstrap_mean<-matrix(nrow = 1000, ncol=21)
# resampled_bootstrap_sd<-matrix(nrow = 1000, ncol=21)
# 
# for (i in 1:21){
#   resampled_bootstrap_mean[,i]<- bootstrap::bootstrap(x=bootstrapped_ideology[,i],nboot = 1000, mean)$thetastar
#   resampled_bootstrap_sd[,i]<- bootstrap::bootstrap(x=bootstrapped_ideology[,i],nboot = 1000, sd)$thetastar
#   print(namesJustices[i])
# }





#adding the resampled bootstrap estimates to the comparison df...
# 
# comparison$boot_mean_resampled<-apply(resampled_bootstrap_mean, 2,mean)
# comparison$boot_sd_resampled<-apply(resampled_bootstrap_sd, 2,mean)
# 

  
ggplot(comparison, aes(sdBoot, proportion_SD))+geom_point()+ scale_x_continuous(limits=c(0,0.2)) +
  scale_y_continuous(limits=c(0,0.2)) + geom_abline(intercept = 0, slope = 1) + 
  annotate("text", x = 0.15, y = 0.025, label ="bootstrap SD > proportion SD") + 
  annotate("text", x = 0.05, y = 0.15, label ="proportion SD > bootstrap SD") +
  theme_bw() +
  xlab("Bootstrapped SD") + ylab("Proportion SD") +
  ggtitle("Comparing Standard Deviations for Justice Ideology Scores")

  # geom_text_repel(aes(sdBoot, proportion_SD,
  #                     label=paste0(Moderates$name,", p=", round(comparison$meanBoot,2),". n=" ,model_sum$n_ideology_Obs , ". prop(moderates)=", round(Moderates$propModerates, 2) )),
  #                 nudge_x=0, show.legend = F)
  



# comparing original measures of ideology with the bootstrapped measures

ggplot(comparison, aes(meanBoot, simple_ideology ))+geom_point() +
  xlab("Bootstrapped Ideology") + ylab("Conventionally calculated ideology")  +
  theme_bw() +  scale_x_continuous(limits=c(0,1)) +
  scale_y_continuous(limits=c(0,1)) + geom_abline(slope=1)




  
# Table 6, Bootstrapped measures columns

View(cbind(namesJustices, model_sum$n_ideology_Obs , comparison$simple_ideology, bootsum$sdBoot))




